#!/usr/bin/python

import sys,os,re,glob,math,glob,signal,traceback
import argparse,subprocess,multiprocessing
from itertools import *
import ocrolib
signal.signal(signal.SIGINT,lambda *args:sys.exit(1))

parser = argparse.ArgumentParser(description = """
%prog [-m charmodel] [-l langmod] image1.png ... > result.hocr
""")

parser.add_argument("files",nargs='*',default=None,help="input images")

parser.add_argument("-C","--nocheck",action="store_true",help="don't check for correct installation")
parser.add_argument("-D","--Display",help="display",action="store_true")
parser.add_argument("-m","--model",default=ocrolib.default.model,help="character model")
parser.add_argument("-l","--lmodel",default=ocrolib.default.ngraphs,help="language model")
parser.add_argument("-b","--book",default=None,help="book directory to be used for intermediate computations")
parser.add_argument("-B","--keep",action="store_true",help="keep the book directory")
parser.add_argument("-o","--output",default="book.html",help="output file (HTML/hOCR format)")

parser.add_argument("--preproc",default="ocropus-nlbin",help="preprocessing command")
parser.add_argument("--pageseg",default="ocropus-gpageseg",help="page segmentation command")
parser.add_argument("--linerec",default="ocropus-lattices",help="line recognition command")
parser.add_argument("--langmod",default="ocropus-ngraphs",help="language modeling command")

parser.add_argument("-Q","--parallel",type=int,default=4)
parser.add_argument("--preproc_chunk",type=int,default=5)
parser.add_argument("--pageseg_chunk",type=int,default=5)
parser.add_argument("--linerec_chunk",type=int,default=100)
parser.add_argument("--langmod_chunk",type=int,default=100)

args = parser.parse_args()
args.files = ocrolib.glob_all(args.files)

if not args.nocheck:
    assert ocrolib.findfile(ocrolib.default.model,error=0) is not None,\
        "cannot find '%s'; run: ocropus-download-models"%ocrolib.default.model
    assert ocrolib.findfile(ocrolib.default.ngraphs,error=0) is not None,\
        "cannot find '%s'; run: ocropus-download-models"%ocrolib.default.ngraphs
    assert ocrolib.findfile(args.model,error=0) is not None,\
        "%s: cannot find"%args.model
    assert ocrolib.findfile(args.lmodel,error=0) is not None,\
        "%s: cannot find"%args.lmodel

def run(*args,**kw):
    command = []
    for k,v in kw.items():
        k = "--"+k if len(k)>1 else "-"+k
        command += [k,str(v)]
    for arg in args:
        command += (arg if type(arg)==list else [arg])
    print "#"," ".join(args)
    assert subprocess.call(args)==0

if args.book is None: args.book = "_book-%06d"%os.getpid()
book = args.book

print "book directory",book

os.mkdir(book)

for i,fname in enumerate(args.files):
    out = book+"/%04d.png"%(i+1)
    print fname,"->",out
    image = ocrolib.read_image_gray(fname)
    ocrolib.write_image_gray(out,image)

print "\n=== preprocess\n"
run(args.preproc,book+"/????.png")

print "\n=== page segmentation\n"
run(args.pageseg,book+"/????.png")

print "\n=== line recognition\n"
run(args.linerec,book+"/????/??????.png",m=args.model)

print "\n=== language modeling\n"
run(args.langmod,book+"/????/??????.lattice",l=args.lmodel)

os.system("ocropus-hocr '%s' > '%s'"%(book,args.output))
print "\noutput in",args.output,"\n"
